/* character.c -- character module.
   Copyright (C) 2003, 2004
     National Institute of Advanced Industrial Science and Technology (AIST)
     Registration Number H15PRO112

   This file is part of the m17n library.

   The m17n library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public License
   as published by the Free Software Foundation; either version 2.1 of
   the License, or (at your option) any later version.

   The m17n library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the m17n library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.  */

/***en
    @addtogroup m17nCharacter
    @brief Character objects and API for them.

    The m17n library represents a @e character by a character code (an
    integer).  The minimum character code is @c 0.  The maximum
    character code is defined by the macro #MCHAR_MAX.  It is
    assured that #MCHAR_MAX is not smaller than @c 0x3FFFFF (22
    bits).

    Characters @c 0 to @c 0x10FFFF are equivalent to the Unicode
    characters of the same code values.

    A character can have zero or more properties called @e character
    @e properties.  A character property consists of a @e key and a
    @e value, where key is a symbol and value is anything that can be
    cast to <tt>(void *)</tt>.  "The character property that belongs
    to character C and whose key is K" may be shortened to "the K
    property of C".  */

/***ja
    @addtogroup m17nCharacter
    @brief IuWFNgƂɊւ API.

    m17n Cu @e  𕶎R[hijŕ\Bŏ̕
    R[h @c 0 ŁAő̕R[h̓}N #MCHAR_MAX ɂ
    `ĂB#MCHAR_MAX  @c 0x3FFFFFi22rbgj ȏł
    Ƃۏ؂ĂB

    @c 0  @c 0x10FFFF ܂ł́̕AƓl Unicode 
    Ɋ蓖ĂĂB

    e @e vpeB ƌĂԃvpeB 0 ȏ㎝Ƃ
    BvpeB @e L[  @e l ȂBL[̓V{
    Al <tt>(void *)</tt> ^ɃLXgł̂Ȃ牽ł悢B 
    u C ̕vpeB̂L[ K ł́vȒP
    u C  K vpeBvƌĂԂƂB  */
/*=*/

#if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
/*** @addtogroup m17nInternal
     @{ */

#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#include <stdio.h>

#include "m17n-core.h"
#include "m17n-misc.h"
#include "internal.h"

typedef struct
{
  MSymbol type;
  void *mdb;
  MCharTable *table;
} MCharPropRecord;

static MPlist *char_prop_list;

static void
free_string (int from, int to, void *str, void *arg)
{
  free (str);
}


/* Internal API */

int
mchar__init ()
{
  char_prop_list = mplist ();

  Mname
    = mchar_define_property ("name", Mstring);
  Mcategory
    = mchar_define_property ("category", Msymbol);
  Mcombining_class
    = mchar_define_property ("combining-class", Minteger);
  Mbidi_category
    = mchar_define_property ("bidirectional-category", Msymbol);
  Msimple_case_folding
    = mchar_define_property ("simple-case-folding", Minteger);
  Mcomplicated_case_folding
    = mchar_define_property ("complicated-case-folding", Mtext);
  Mscript
    = mchar_define_property ("script", Msymbol);

  return 0;
}

void
mchar__fini (void)
{
  MPlist *p;

  for (p = char_prop_list; mplist_key (p) != Mnil; p = mplist_next (p))
    {
      MCharPropRecord *record = mplist_value (p);

      if (record->table)
	{
	  if (record->type == Mstring)
	    mchartable_map (record->table, NULL, free_string, NULL);
	  M17N_OBJECT_UNREF (record->table);
	}
      free (record);
    }
  M17N_OBJECT_UNREF (char_prop_list);
}

/*** @} */
#endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */

/* External API */

/*** @addtogroup m17nCharacter */
/*** @{ */
/*=*/

#ifdef FOR_DOXYGEN
/***en
    @brief Maximum character code.

    The macro #MCHAR_MAX gives the maximum character code.  */

/***ja
    @brief R[h̍ől.

    }N #MCHAR_MAX ͕R[h̍ől^B  */

#define MCHAR_MAX
/*=*/
#endif /* FOR_DOXYGEN */

/***en
    @ingroup m17nCharacter
    @name Variables: Keys of character properties

    These symbols are used as keys of character properties.  */

/***ja
     @name ϐ: vpeB̃L[

     ̃V{͕vpeB̃L[ƂĎgB*/
/*=*/
/*** @{ */

/***en
    @brief Key for script.

    The symbol #Mscript has the name <tt>"script"</tt> and is used as the key
    of a character property.  The value of such a property is a symbol
    representing the script to which the character belongs.

    Each symbol that represents a script has one of the names listed in
    the <em>Unicode Technical Report #24</em>.  */

/***ja
    @brief XNvg\킷L[.

    V{ #Mscript  <tt>"script"</tt> ƂOAv
    peB̃L[ƂĎgB̃vpeB̒ĺȂ̕
    XNvg\킷V{łB

    XNvg\킷V{̖ÓA<em>Unicode Technical Report
    #24</em> ɃXgĂ̂̂ꂩłB  */

MSymbol Mscript;

/*=*/

/***en
    @brief Key for character name.

    The symbol #Mname has the name <tt>"name"</tt> and is used as
    the key of a character property.  The value of such a property is a
    C-string representing the name of the character.  */

/***ja
    @brief O\킷L[.

    V{ #Mname  <tt>"name"</tt> ƂOAvp
    eB̃L[ƂĎgB̃vpeB̒l C-string łA
    ̖̕O\킷B  */

MSymbol Mname;

/*=*/

/***en
    @brief Key for general category.

    The symbol #Mcategory has the name <tt>"category"</tt> and is
    used as the key of a character property.  The value of such a
    property is a symbol representing the <em>general category</em> of
    the character.

    Each symbol that represents a general category has one of the
    names listed as abbreviations for <em>General Category</em> in
    Unicode.  */

/***ja
    @brief ʃJeS\킷L[.

    V{ #Mcategory  <tt>"category"</tt> ƂOA
    vpeB̃L[ƂĎgB̃vpeB̒ĺAΉ
    <em>ʃJeS</em> \킷V{łB

    ʃJeS\킷V{̖ÓA<em>General Category</em>
    ȗ`Ƃ Unicode ɒ`Ă̂łB  */

MSymbol Mcategory;

/*=*/

/***en
    @brief Key for canonical combining class.

    The symbol #Mcombining_class has the name
    <tt>"combining-class"</tt> and is used as the key of a character
    property.  The value of such a property is an integer that
    represents the <em>canonical combining class</em> of the character.

    The meaning of each integer that represents a canonical combining
    class is identical to the one defined in Unicode.  */

/***ja
    @brief WNX\킷L[.

    V{ #Mcombining_class  <tt>"combining-class"</tt> Ƃ
    OAvpeB̃L[ƂĎgB̃vpeB̒l
    ́AΉ @e WNX \킷łB

    WNX\킷̈Ӗ́AUnicode ɒ`Ă
    ƓłB  */

MSymbol Mcombining_class;
/*=*/

/***en
    @brief Key for bidi category.

    The symbol #Mbidi_category has the name <tt>"bidi-category"</tt>
    and is used as the key of a character property.  The value of such
    a property is a symbol that represents the <em>bidirectional
    category</em> of the character.

    Each symbol that represents a bidirectional category has one of
    the names listed as types of <em>Bidirectional Category</em> in
    Unicode.  */

/***ja
    @brief oJeS\킷L[.

    V{ #Mbidi_category  <tt>"bidi-category"</tt> ƂO
    AvpeB̃L[ƂĎgB̃vpeB̒ĺA
    Ή @e oJeS \킷V{łB

    oJeS\킷V{̖ÓA<em>Bidirectional
    Category</em> ̌^Ƃ Unicode ɒ`Ă̂łB  */

MSymbol Mbidi_category;
/*=*/

/***en
    @brief Key for corresponding single lowercase character.

    The symbol #Msimple_case_folding has the name
    <tt>"simple-case-folding"</tt> and is used as the key of a
    character property.  The value of such a property is the
    corresponding single lowercase character that is used when
    comparing M-texts ignoring cases.

    If a character requires a complicated comparison (i.e. cannot be
    compared by simply mapping to another single character), the value
    of such a property is @c 0xFFFF.  In this case, the character has
    another property whose key is #Mcomplicated_case_folding.  */

/***ja
    @brief Ή鏬ꕶ\킷L[.

    V{ #Msimple_case_folding  <tt>"simple-case-folding"</tt> 
    ƂOAvpeB̃L[ƂĎgB̃vpeB
    ̒ĺAΉ鏬ꕶłA啶^̋ʂ𖳎
    r̍ۂɎgB

    GȔr@KvƂ镶łꍇiʂ̈ꕶƑΉt
    ƂɂĔrłȂꍇjÃvpeB̒l @c 0xFFFF 
    ȂB̏ꍇ́̕A#Mcomplicated_case_folding ƂL[
    vpeBB  */

MSymbol Msimple_case_folding;
/***en
    @brief Key for corresponding multiple lowercase characters.

    The symbol #Mcomplicated_case_folding has the name
    <tt>"complicated-case-folding"</tt> and is used as the key of a
    character property.  The value of such a property is the
    corresponding M-text that contains a sequence of lowercase
    characters to be used for comparing M-texts ignoring case.  */

/***ja
    @brief Ή鏬̗\킷L[.

    V{ #Mcomplicated_case_folding  
    <tt>"complicated-case-folding"</tt> ƂOAvpeB
    ̃L[ƂĎgB̃vpeB̒ĺAΉ鏬񂩂
     M-text łA啶^̋ʂ𖳎r̍ۂɎg
    B
      */

MSymbol Mcomplicated_case_folding;
/*=*/
/*** @} */
/*=*/

/***en
    @brief Define a character property.

    The mchar_define_property () function searches the m17n database
    for a data whose tags are \<#Mchar_table, $TYPE, $SYM \>.
    Here, $SYM is a symbol whose name is $NAME.  $TYPE must be
    #Mstring, #Mtext, #Msymbol, #Minteger, or #Mplist.

    @return
    If the operation was successful, mchar_define_property () returns
    $SYM.  Otherwise it returns #Mnil.  */

/***ja
    @brief vpeB`.

    ֐ mchar_define_property () ́A \<#Mchar_table, $TYPE, $SYM \>
    Ƃ^Of[^x[X m17n x[XTB  
     $SYM  $NAME ƂÕV{łB$TYPE #Mstring,
    #Mtext, #Msymbol, #Minteger, #Mplist ̂ꂩłȂ΂ȂȂB

    @return
    ɐ mchar_define_property () $SYM ԂB
    sꍇ #Mnil ԂB  */

/***
    @errors
    @c MERROR_DB

    @seealso
    mchar_get_prop (), mchar_put_prop ()  */

MSymbol
mchar_define_property (const char *name, MSymbol type)
{
  MSymbol key = msymbol (name);
  MCharPropRecord *record;

  record = mplist_get (char_prop_list, key);
  if (record)
    {
      if (record->table)
	M17N_OBJECT_UNREF (record->table);
    }
  else
    {
      MSTRUCT_CALLOC (record, MERROR_CHAR);
      mplist_put (char_prop_list, key, record);
    }

  record->type = type;
  if (mdatabase__finder
      && (record->mdb = (*mdatabase__finder) (Mchar_table, type, key, Mnil)))
    {
      record->table = NULL;
    }
  else
    {
      void *default_value = NULL;

      record->mdb = NULL;
      if (type == Minteger)
	default_value = (void *) -1;
      record->table = mchartable (type, default_value);
    }

  return key;
}

/*=*/

/***en
    @brief Get the value of a character property.

    The mchar_get_prop () function searches character $C for the
    character property whose key is $KEY.

    @return
    If the operation was successful, mchar_get_prop () returns the
    value of the character property.  Otherwise it returns @c
    NULL.  */

/***ja
    @brief vpeB̒l𓾂.

    ֐ mchar_get_prop () ́A $C ̕vpeB̂L[ 
    $KEY ł̂TB

    @return
     mchar_get_prop () ͌vpeB̒l
    Bsꍇ @c NULL ԂB

    @latexonly \IPAlabel{mchar_get_prop} @endlatexonly
*/
/***
    @errors
    @c MERROR_SYMBOL, @c MERROR_DB

    @seealso
    mchar_define_property (), mchar_put_prop ()  */

void *
mchar_get_prop (int c, MSymbol key)
{
  MCharPropRecord *record;

  record = mplist_get (char_prop_list, key);
  if (! record)
    return NULL;
  if (record->mdb)
    {
      record->table = (*mdatabase__loader) (record->mdb);
      if (! record->table)
	MERROR (MERROR_DB, NULL);
      record->mdb = NULL;
    }
  return mchartable_lookup (record->table, c);
}

/*=*/

/***en
    @brief Set the value of a character property.

    The mchar_put_prop () function searches character $C for the
    character property whose key is $KEY and assigns $VAL to the value
    of the found property.

    @return
    If the operation was successful, mchar_put_prop () returns 0.
    Otherwise, it returns -1.  */
/***ja
    @brief vpeB̒lݒ肷.

    ֐ mchar_put_prop () ́A $C ̕vpeB̂L[ $KEY 
    ̂TA̒lƂ $VAL ݒ肷B

    @return
     mchar_put_prop () 0ԂBsꍇ-1
    B  */
/***
    @errors
    @c MERROR_SYMBOL, @c MERROR_DB

    @seealso
    mchar_define_property (), mchar_get_prop ()   */

int
mchar_put_prop (int c, MSymbol key, void *val)
{
  MCharPropRecord *record;

  record = mplist_get (char_prop_list, key);
  if (! record)
    return -1;
  if (record->mdb)
    {
      record->table = (*mdatabase__loader) (record->mdb);
      if (! record->table)
	MERROR (MERROR_DB, -1);
      record->mdb = NULL;
    }
  return mchartable_set (record->table, c, val);
}

/*** @} */

/*
  Local Variables:
  coding: euc-japan
  End:
*/
